Multiple Time Series Forecasting with LSTM Model
Data Source: Chen, D. (2015). Online Retail [Dataset]. UCI Machine Learning Repository. https://doi.org/10.24432/C5BW33.
Retrived from https://archive.ics.uci.edu/dataset/352/online+retail
Importing Library¶
In [5]:
# Mount Google Drive to access files
from google.colab import drive
drive.mount('/content/drive')
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
In [6]:
%cd /content/drive/MyDrive/Multiple Forecastin Time Series LSTM
/content/drive/MyDrive/Multiple Forecastin Time Series LSTM
In [7]:
#warning
import logging
logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)
logging.getLogger("darts").setLevel(logging.ERROR)
import warnings
warnings.filterwarnings("ignore")
# Silence PyTorch Lightning logs
pl_logger = logging.getLogger("pytorch_lightning")
pl_logger.setLevel(logging.WARNING)
In [8]:
# Install the darts library
!pip install darts -q
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 56.0/56.0 kB 2.5 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 46.3/46.3 kB 3.7 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.0/1.0 MB 21.7 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 200.6/200.6 kB 21.3 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 823.1/823.1 kB 63.0 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 354.4/354.4 kB 31.9 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 101.7/101.7 kB 11.1 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 363.4/363.4 MB 3.0 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 13.8/13.8 MB 120.1 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 24.6/24.6 MB 96.0 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 883.7/883.7 kB 57.9 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 664.8/664.8 MB 1.7 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 211.5/211.5 MB 11.6 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 56.3/56.3 MB 43.3 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 127.9/127.9 MB 19.2 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 207.5/207.5 MB 4.6 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 21.1/21.1 MB 78.9 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 285.8/285.8 kB 27.0 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 278.2/278.2 kB 22.2 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 962.5/962.5 kB 62.4 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 42.2/42.2 kB 3.8 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 62.3/62.3 kB 6.1 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 135.3/135.3 kB 13.3 MB/s eta 0:00:00
In [9]:
# Standard Libraries
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import month_plot, quarter_plot
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import numpy as np
from sklearn.model_selection import ParameterGrid
# Darts functions
from darts.timeseries import TimeSeries
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.dataprocessing.transformers import Scaler
from darts.models import RNNModel
Preprocess Data¶
In [10]:
df = pd.read_csv('Online Retail.csv')
df
Out[10]:
| InvoiceNo | StockCode | Description | Quantity | InvoiceDate | UnitPrice | CustomerID | Country | |
|---|---|---|---|---|---|---|---|---|
| 0 | 536365 | 85123A | WHITE HANGING HEART T-LIGHT HOLDER | 6 | 12/1/2010 8:26 | 2.55 | 17850.0 | United Kingdom |
| 1 | 536365 | 71053 | WHITE METAL LANTERN | 6 | 12/1/2010 8:26 | 3.39 | 17850.0 | United Kingdom |
| 2 | 536365 | 84406B | CREAM CUPID HEARTS COAT HANGER | 8 | 12/1/2010 8:26 | 2.75 | 17850.0 | United Kingdom |
| 3 | 536365 | 84029G | KNITTED UNION FLAG HOT WATER BOTTLE | 6 | 12/1/2010 8:26 | 3.39 | 17850.0 | United Kingdom |
| 4 | 536365 | 84029E | RED WOOLLY HOTTIE WHITE HEART. | 6 | 12/1/2010 8:26 | 3.39 | 17850.0 | United Kingdom |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 541904 | 581587 | 22613 | PACK OF 20 SPACEBOY NAPKINS | 12 | 12/9/2011 12:50 | 0.85 | 12680.0 | France |
| 541905 | 581587 | 22899 | CHILDREN'S APRON DOLLY GIRL | 6 | 12/9/2011 12:50 | 2.10 | 12680.0 | France |
| 541906 | 581587 | 23254 | CHILDRENS CUTLERY DOLLY GIRL | 4 | 12/9/2011 12:50 | 4.15 | 12680.0 | France |
| 541907 | 581587 | 23255 | CHILDRENS CUTLERY CIRCUS PARADE | 4 | 12/9/2011 12:50 | 4.15 | 12680.0 | France |
| 541908 | 581587 | 22138 | BAKING SET 9 PIECE RETROSPOT | 3 | 12/9/2011 12:50 | 4.95 | 12680.0 | France |
541909 rows × 8 columns
In [11]:
# Stockcode to look up the stock code description
stockcode_map = (
df[['StockCode', 'Description']]
.dropna()
.drop_duplicates(subset='StockCode') # Ensure one description per code
.set_index('StockCode')['Description']
.astype(str)
.to_dict()
)
#the lookup function
def stockcode(code):
"""
Lookup item name (description) by StockCode.
Parameters:
- code (str or int): the stock code to look up
Returns:
- description (str): the item name if found, else "Not found"
"""
code = str(code).strip().upper()
return stockcode_map.get(code, "Not found")
In [12]:
# Load and parse datetime
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
# Extract just the date (ignore hour/minute)
df['Date'] = df['InvoiceDate'].dt.date
# Group by Date and StockCode
grouped = df.groupby(['Date', 'StockCode'])['Quantity'].sum().reset_index()
# Pivot to get one time series per product
pivot_df = grouped.pivot(index='Date', columns='StockCode', values='Quantity').fillna(0)
#Sort by date
pivot_df = pivot_df.sort_index()
pivot_df.index = pd.to_datetime(pivot_df.index)
pivot_df = pivot_df.asfreq('D', fill_value=0)
In [13]:
pivot_df
Out[13]:
| StockCode | 10002 | 10080 | 10120 | 10123C | 10123G | 10124A | 10124G | 10125 | 10133 | 10134 | ... | M | PADS | POST | S | gift_0001_10 | gift_0001_20 | gift_0001_30 | gift_0001_40 | gift_0001_50 | m |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||||||||
| 2010-12-01 | 60.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 | 5.0 | 0.0 | ... | 2.0 | 0.0 | 5.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2010-12-02 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 5.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2010-12-03 | 8.0 | 0.0 | 3.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | ... | 3.0 | 0.0 | 15.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2010-12-04 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2010-12-05 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 14.0 | 0.0 | ... | 56.0 | 0.0 | 23.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2011-12-05 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 802.0 | 0.0 | 15.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2011-12-06 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 5.0 | 0.0 | 24.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2011-12-07 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | -2.0 | 0.0 | 21.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2011-12-08 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 3.0 | 0.0 | 12.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2011-12-09 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 20.0 | 0.0 | 0.0 | ... | -1.0 | 0.0 | 9.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
374 rows × 4070 columns
In [14]:
print(stockcode('10124G'))
ARMY CAMO BOOKCOVER TAPE
In [15]:
pivot_df.isnull().sum()
Out[15]:
| 0 | |
|---|---|
| StockCode | |
| 10002 | 0 |
| 10080 | 0 |
| 10120 | 0 |
| 10123C | 0 |
| 10123G | 0 |
| ... | ... |
| gift_0001_20 | 0 |
| gift_0001_30 | 0 |
| gift_0001_40 | 0 |
| gift_0001_50 | 0 |
| m | 0 |
4070 rows × 1 columns
In [16]:
len(pivot_df)
Out[16]:
374
In [17]:
# Simple plots
plt.figure(figsize = (18,6))
pivot_df.plot(legend=False)
plt.show()
<Figure size 1800x600 with 0 Axes>
In [18]:
# Calculate total quantity sold per StockCode (i.e., sum of each column)
total_quantity = pivot_df.sum()
# Sort in ascending order to see the refunds
quantity = total_quantity.sort_values(ascending=True)
# Show top 10
quantity.head(10)
Out[18]:
| 0 | |
|---|---|
| StockCode | |
| 23005 | -14418.0 |
| 23003 | -8516.0 |
| 72140F | -5368.0 |
| 79323W | -4838.0 |
| 79323LP | -2618.0 |
| 72732 | -2472.0 |
| 23059 | -2376.0 |
| 79323P | -2007.0 |
| 79323B | -1671.0 |
| 22618 | -1632.0 |
In [19]:
print(stockcode('23005'))
TRAVEL CARD WALLET I LOVE LONDON
Notes: We only want sales forecasting therefore we only count the quantity of product being sold. Hence, we only takes the positive values and remove the negative (refund) one.¶
In [20]:
df = pivot_df.copy()
df[df < 0] = 0
Explanatory Data Analysis¶
In [21]:
# Simple plots
plt.figure(figsize = (18,6))
df.plot(legend=False)
plt.show()
<Figure size 1800x600 with 0 Axes>
In [22]:
# Make sure the index is datetime
df.index = pd.to_datetime(df.index)
# Compute average sales per weekday (0 = Monday, 6 = Sunday)
daily_means = df.groupby(df.index.dayofweek).mean()
# Plot
plt.figure(figsize=(18,6))
plt.plot(daily_means, marker='o', alpha=0.6)
plt.grid()
plt.title("Weekly Seasonality of Sales")
plt.xlabel("Day of Week (0 = Monday)")
plt.ylabel("Quantity Sold")
plt.xticks(ticks=range(7), labels=['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])
plt.show()
In [23]:
# Make sure the index is datetime
df.index = pd.to_datetime(df.index)
# Compute average sales per weekday (0 = Monday, 6 = Sunday)
daily_means = df.groupby(df.index.dayofweek).mean()
# Standardize the means (z-score per column)
daily_means_std = (daily_means - daily_means.mean()) / daily_means.std()
# Plot
plt.figure(figsize=(18,6))
plt.plot(daily_means_std, marker='o', alpha=0.6)
plt.grid()
plt.title("Weekly Seasonality of Sales (Standardized)")
plt.xlabel("Day of Week (0 = Monday)")
plt.ylabel("Standardized Mean Quantity Sold")
plt.xticks(ticks=range(7), labels=['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])
plt.show()
In [24]:
day_sums = df.groupby(df.index.dayofweek).sum()
day_sums_total = day_sums.sum(axis=1)
plt.figure(figsize=(10,4))
day_sums_total.plot(kind='bar')
plt.title("Total Quantity Sold per Day of Week")
plt.xticks(ticks=range(7), labels=['Mon','Tue','Wed','Thu','Fri','Sat','Sun'], rotation=0)
plt.ylabel("Total Quantity Sold")
plt.grid(True)
plt.show()
In [25]:
# Check if Saturday has real transactions
df[df.index.dayofweek == 5].sum().sum() # 5 = Saturday
Out[25]:
np.float64(0.0)
Saturday only exist because we do asfreq('D', fill_value=0) meanwhile in the real dataset most probably at saturday they are not operating (invoices or recorded transactions might only be generated on weekdays or when staff process them).
In [26]:
# Make sure the index is datetime
df.index = pd.to_datetime(df.index)
# Compute average sales per month (1 = January, ..., 12 = December)
monthly_means = df.groupby(df.index.month).mean()
# Plot
plt.figure(figsize=(18,6))
plt.plot(monthly_means, marker='o', alpha=0.6)
plt.grid()
plt.title("Monthly Seasonality of Sales")
plt.xlabel("Month")
plt.ylabel("Average Quantity Sold")
plt.xticks(ticks=range(1, 13), labels=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
plt.show()
In [27]:
# Ensure the index is datetime
df.index = pd.to_datetime(df.index)
#Compute average sales per month (1 = Jan, ..., 12 = Dec)
monthly_means = df.groupby(df.index.month).mean()
# Standardize (Z-score) across months, per product
monthly_means_std = (monthly_means - monthly_means.mean()) / monthly_means.std()
# Plot standardized values
plt.figure(figsize=(18,6))
plt.plot(monthly_means_std, marker='o', alpha=0.6)
plt.grid()
plt.title("Standardized Monthly Seasonality of Sales")
plt.xlabel("Month")
plt.ylabel("Standardized Quantity (Z-score)")
plt.xticks(ticks=range(1, 13), labels=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
plt.show()
In [28]:
import seaborn as sns
plt.figure(figsize=(12, 8))
sns.heatmap(monthly_means_std.T, cmap='coolwarm', center=0, cbar_kws={'label': 'Z-score'})
plt.title("Heatmap of Standardized Monthly Sales Seasonality")
plt.xlabel("Month")
plt.ylabel("Product (StockCode)")
plt.xticks(ticks=np.arange(12), labels=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'], rotation=0)
plt.show()
LSTM Preparation¶
In [29]:
series = TimeSeries.from_dataframe(df)
In [30]:
# Create time series instances
# Month
month_series = datetime_attribute_timeseries(series,
attribute = "month",
one_hot = True)
# Weekday
weekday_series = datetime_attribute_timeseries(series,
attribute = "weekday",
one_hot = True)
In [31]:
# Import the Scaler class and initialize it
scaler1 = Scaler()
In [32]:
# Apply the scaler to the time series
series_transformed = scaler1.fit_transform(series)
In [33]:
# Stack the weekday and hour covariates into a single TimeSeries object
covariates_transformed = weekday_series.stack(month_series)
LSTM Model¶
In [34]:
df
Out[34]:
| StockCode | 10002 | 10080 | 10120 | 10123C | 10123G | 10124A | 10124G | 10125 | 10133 | 10134 | ... | M | PADS | POST | S | gift_0001_10 | gift_0001_20 | gift_0001_30 | gift_0001_40 | gift_0001_50 | m |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||||||||
| 2010-12-01 | 60.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 | 5.0 | 0.0 | ... | 2.0 | 0.0 | 5.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2010-12-02 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 5.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2010-12-03 | 8.0 | 0.0 | 3.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | ... | 3.0 | 0.0 | 15.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2010-12-04 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2010-12-05 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 14.0 | 0.0 | ... | 56.0 | 0.0 | 23.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2011-12-05 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 802.0 | 0.0 | 15.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2011-12-06 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 5.0 | 0.0 | 24.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2011-12-07 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 21.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2011-12-08 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 3.0 | 0.0 | 12.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2011-12-09 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 20.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 9.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
374 rows × 4070 columns
In [35]:
# Define the input chunk length, forecasting horizon, and training length
forecasting_horizon = 7 # forecasting the next 7 days
input_chunk_length = 28 # Using past 28 days as input (~a month)
training_length = forecasting_horizon + input_chunk_length
In [36]:
# LSTM model
model = RNNModel(model = "LSTM", # Specify LSTM model
hidden_dim = 20, # Number of hidden units
n_rnn_layers = 2, # Number of RNN layers
dropout = 0.1, # Dropout rate
n_epochs = 10, # Number of training epochs
optimizer_kwargs = {"lr": 0.003}, # Learning rate for the optimizer
random_state = 1502, # Random seed for reproducibility
training_length = training_length, # Length of training data
input_chunk_length = input_chunk_length, # Length of input chunks
#pl_trainer_kwargs = {"accelerator": "cpu"} # Use CPU for training
pl_trainer_kwargs = {"accelerator": "gpu",
"devices": [0]} # Specify GPU device
)
# Fit the model with the transformed time series data and covariates
model.fit(series_transformed, future_covariates = covariates_transformed)
Training: | | 0/? [00:00<?, ?it/s]
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=10` reached.
Out[36]:
RNNModel(model=LSTM, hidden_dim=20, n_rnn_layers=2, dropout=0.1, training_length=35, n_epochs=10, optimizer_kwargs={'lr': 0.003}, random_state=1502, input_chunk_length=28, pl_trainer_kwargs={'accelerator': 'gpu', 'devices': [0]})
Cross Validation¶
In [37]:
# Perform cross-validation using a rolling forecasting window
cv = model.historical_forecasts(series = series_transformed,
future_covariates = covariates_transformed,
start = df.shape[0] - 200, # Start point for forecasting window
forecast_horizon = forecasting_horizon, # Forecast horizon
stride = 7, # Stride for rolling window
retrain = True, # Retrain model at each step
last_points_only = False) # Use all points for forecasting
Training: | | 0/? [00:00<?, ?it/s]
Training: | | 0/? [00:00<?, ?it/s]
Training: | | 0/? [00:00<?, ?it/s]
Training: | | 0/? [00:00<?, ?it/s]
Training: | | 0/? [00:00<?, ?it/s]
In [38]:
rmse_cv = []
# Convert full series to pandas once
series_df = pd.DataFrame(series.values(), index=series.time_index)
for i in range(len(cv)):
# Inverse transform the forecasted TimeSeries
forecast_ts = scaler1.inverse_transform(cv[i])
# Convert to pandas DataFrame using values + time_index
forecast_df = pd.DataFrame(forecast_ts.values(), index=forecast_ts.time_index)
# Slice the actual series to the same date range
actual_df = series_df.loc[forecast_df.index]
# Compute RMSE
error_cv = np.sqrt(mean_squared_error(actual_df, forecast_df))
# Store the result
rmse_cv.append(error_cv)
# Print the mean RMSE across all cross-validation folds
print(np.mean(rmse_cv))
27.912901542614126
Parameter Tuning¶
In [40]:
# Define the parameter grid for tuning
param_grid = {'n_rnn_layers': [2,4],
'hidden_dim': [10,20],
'dropout': [0.1, 0.2],
'n_epochs': [10, 20],
'lr': [0.003],
'training_length': [35],
'input_chunk_length': [28]}
# Generate all combinations of the parameters
grid = ParameterGrid(param_grid)
# Get the total number of parameter combinations
len(list(grid))
Out[40]:
16
In [41]:
# Initialize a list to store RMSE values for each parameter combination
rmse = []
# Iterate over each parameter combination in the grid
for params in grid:
# Build the LSTM model with the current set of parameters
model = RNNModel(model = "LSTM", # Specify LSTM model
hidden_dim = params['hidden_dim'], # Number of hidden units
n_rnn_layers = params['n_rnn_layers'], # Number of RNN layers
dropout = params['dropout'], # Dropout rate
n_epochs = params['n_epochs'], # Number of training epochs
optimizer_kwargs = {"lr": params['lr']}, # Learning rate for the optimizer
random_state = 1502, # Random seed for reproducibility
training_length = params['training_length'], # Length of training data
input_chunk_length = params['input_chunk_length'], # Length of input chunks
# pl_trainer_kwargs = {"accelerator": "cpu"}
pl_trainer_kwargs = {"accelerator": "gpu",
"devices": [0],
"enable_progress_bar": False,
"logger": False,
"enable_model_summary": False}
)
# Fit the model with the transformed time series data and covariates
model.fit(series_transformed, future_covariates = covariates_transformed)
# Perform cross-validation with a rolling forecasting window
cv = model.historical_forecasts(series = series_transformed,
future_covariates = covariates_transformed,
start = df.shape[0] - 200, # Start point for the forecasting window
forecast_horizon = forecasting_horizon, # Forecast horizon
stride = 7, # Stride for the rolling window
retrain = True, # Retrain model at each step
last_points_only = False) # Use all points for forecasting
rmse_cv = []
# Iterate over each forecast result from cross-validation
for i in range(len(cv)):
# Convert forecasted values back to the original scale
ts = scaler1.inverse_transform(cv[i])
predictions = pd.DataFrame(ts.values(), index=ts.time_index, columns=ts.components)
# Determine the range of dates for actual values
start = predictions.index.min()
end = predictions.index.max()
# Extract the actual values from the original dataset
actuals = df.loc[start:end]
# Calculate the RMSE for the current cross-validation fold
error_cv = np.sqrt(mean_squared_error(actuals, predictions))
rmse_cv.append(error_cv)
# Calculate and store the mean RMSE for the current parameter combination
error = np.mean(rmse_cv)
rmse.append(error)
In [42]:
# Retrieve the best params and put in in a dataframe
tuning_results = pd.DataFrame(grid)
tuning_results['rmse'] = rmse
tuning_results
Out[42]:
| dropout | hidden_dim | input_chunk_length | lr | n_epochs | n_rnn_layers | training_length | rmse | |
|---|---|---|---|---|---|---|---|---|
| 0 | 0.1 | 10 | 28 | 0.003 | 10 | 2 | 35 | 28.202768 |
| 1 | 0.1 | 10 | 28 | 0.003 | 10 | 4 | 35 | 28.178899 |
| 2 | 0.1 | 10 | 28 | 0.003 | 20 | 2 | 35 | 27.879024 |
| 3 | 0.1 | 10 | 28 | 0.003 | 20 | 4 | 35 | 27.874018 |
| 4 | 0.1 | 20 | 28 | 0.003 | 10 | 2 | 35 | 27.912902 |
| 5 | 0.1 | 20 | 28 | 0.003 | 10 | 4 | 35 | 27.903459 |
| 6 | 0.1 | 20 | 28 | 0.003 | 20 | 2 | 35 | 27.710273 |
| 7 | 0.1 | 20 | 28 | 0.003 | 20 | 4 | 35 | 27.845599 |
| 8 | 0.2 | 10 | 28 | 0.003 | 10 | 2 | 35 | 28.193481 |
| 9 | 0.2 | 10 | 28 | 0.003 | 10 | 4 | 35 | 28.174830 |
| 10 | 0.2 | 10 | 28 | 0.003 | 20 | 2 | 35 | 27.879156 |
| 11 | 0.2 | 10 | 28 | 0.003 | 20 | 4 | 35 | 27.873047 |
| 12 | 0.2 | 20 | 28 | 0.003 | 10 | 2 | 35 | 27.912294 |
| 13 | 0.2 | 20 | 28 | 0.003 | 10 | 4 | 35 | 27.904480 |
| 14 | 0.2 | 20 | 28 | 0.003 | 20 | 2 | 35 | 27.758939 |
| 15 | 0.2 | 20 | 28 | 0.003 | 20 | 4 | 35 | 27.857945 |
In [43]:
# Identify the parameter set(s) with the lowest RMSE and export it to a CSV file
best_params = tuning_results[tuning_results['rmse'] == tuning_results['rmse'].min()]
best_params = best_params.iloc[0,:]
best_params.to_csv("best_params_round1.csv")
Parameter Tuning 2¶
In [44]:
# Load the best parameters from the CSV file
params_round1 = pd.read_csv("best_params_round1.csv", index_col = 0)
params_round1
Out[44]:
| 6 | |
|---|---|
| dropout | 0.100000 |
| hidden_dim | 20.000000 |
| input_chunk_length | 28.000000 |
| lr | 0.003000 |
| n_epochs | 20.000000 |
| n_rnn_layers | 2.000000 |
| training_length | 35.000000 |
| rmse | 27.710273 |
In [55]:
# Isolate and convert the three parameters from the best parameter set
n_rnn_layers = int(params_round1.loc["n_rnn_layers"].iloc[0]) # Extract and convert the number of RNN layers
hidden_dim = int(params_round1.loc["hidden_dim"].iloc[0]) # Extract and convert the number of hidden units
dropout = params_round1.loc["dropout"][0] # Extract the dropout rate
n_epochs = int(params_round1.loc["n_epochs"][0])
n_epochs # Display the dropout rate
Out[55]:
20
In [63]:
# Define the parameter grid for tuning
param_grid = {'n_rnn_layers': [n_rnn_layers],
'hidden_dim': [hidden_dim],
'dropout': [dropout],
'n_epochs': [n_epochs ],
'lr': [0.0005, 0.003],
'training_length': [35, 40],
'input_chunk_length': [28, 20, 33]}
# Generate all combinations of the parameters
grid = ParameterGrid(param_grid)
# Get the total number of parameter combinations
len(list(grid))
Out[63]:
12
In [64]:
# Initialize a list to store RMSE values for each parameter combination
rmse = []
# Iterate over each parameter combination in the grid
for params in grid:
# Build the LSTM model with the current set of parameters
model = RNNModel(model = "LSTM", # Specify LSTM model
hidden_dim = params['hidden_dim'], # Number of hidden units
n_rnn_layers = params['n_rnn_layers'], # Number of RNN layers
dropout = params['dropout'], # Dropout rate
n_epochs = params['n_epochs'], # Number of training epochs
optimizer_kwargs = {"lr": params['lr']}, # Learning rate for the optimizer
random_state = 1502, # Random seed for reproducibility
training_length = params['training_length'], # Length of training data
input_chunk_length = params['input_chunk_length'], # Length of input chunks
# pl_trainer_kwargs = {"accelerator": "cpu"}
pl_trainer_kwargs = {"accelerator": "gpu",
"devices": [0],
"enable_progress_bar": False,
"logger": False,
"enable_model_summary": False}
)
# Fit the model with the transformed time series data and covariates
model.fit(series_transformed, future_covariates = covariates_transformed)
# Perform cross-validation with a rolling forecasting window
cv = model.historical_forecasts(series = series_transformed,
future_covariates = covariates_transformed,
start = df.shape[0] - 200, # Start point for the forecasting window
forecast_horizon = forecasting_horizon, # Forecast horizon
stride = 7, # Stride for the rolling window
retrain = True, # Retrain model at each step
last_points_only = False) # Use all points for forecasting
rmse_cv = []
# Iterate over each forecast result from cross-validation
for i in range(len(cv)):
# Convert forecasted values back to the original scale
ts = scaler1.inverse_transform(cv[i])
predictions = pd.DataFrame(ts.values(), index=ts.time_index, columns=ts.components)
# Determine the range of dates for actual values
start = predictions.index.min()
end = predictions.index.max()
# Extract the actual values from the original dataset
actuals = df.loc[start:end]
# Calculate the RMSE for the current cross-validation fold
error_cv = np.sqrt(mean_squared_error(actuals, predictions))
rmse_cv.append(error_cv)
# Calculate and store the mean RMSE for the current parameter combination
error = np.mean(rmse_cv)
rmse.append(error)
In [65]:
# Retrieve the best params and put in in a dataframe
tuning_results = pd.DataFrame(grid)
tuning_results['rmse'] = rmse
tuning_results
Out[65]:
| dropout | hidden_dim | input_chunk_length | lr | n_epochs | n_rnn_layers | training_length | rmse | |
|---|---|---|---|---|---|---|---|---|
| 0 | 0.1 | 20 | 28 | 0.0005 | 20 | 2 | 35 | 29.111854 |
| 1 | 0.1 | 20 | 28 | 0.0005 | 20 | 2 | 40 | 29.183238 |
| 2 | 0.1 | 20 | 28 | 0.0030 | 20 | 2 | 35 | 27.710273 |
| 3 | 0.1 | 20 | 28 | 0.0030 | 20 | 2 | 40 | 27.728238 |
| 4 | 0.1 | 20 | 20 | 0.0005 | 20 | 2 | 35 | 29.111747 |
| 5 | 0.1 | 20 | 20 | 0.0005 | 20 | 2 | 40 | 29.183085 |
| 6 | 0.1 | 20 | 20 | 0.0030 | 20 | 2 | 35 | 27.710301 |
| 7 | 0.1 | 20 | 20 | 0.0030 | 20 | 2 | 40 | 27.728255 |
| 8 | 0.1 | 20 | 33 | 0.0005 | 20 | 2 | 35 | 29.111857 |
| 9 | 0.1 | 20 | 33 | 0.0005 | 20 | 2 | 40 | 29.183243 |
| 10 | 0.1 | 20 | 33 | 0.0030 | 20 | 2 | 35 | 27.710269 |
| 11 | 0.1 | 20 | 33 | 0.0030 | 20 | 2 | 40 | 27.728237 |
In [66]:
# Identify the parameter set(s) with the lowest RMSE and export it to a CSV file
best_params = tuning_results[tuning_results['rmse'] == tuning_results['rmse'].min()]
best_params = best_params.iloc[0,:]
best_params.to_csv("best_params_round2.csv")
Predict the future¶
In [67]:
# Load the best parameters from the CSV file
params_round2 = pd.read_csv("best_params_round2.csv", index_col = 0)
params_round2
Out[67]:
| 10 | |
|---|---|
| dropout | 0.100000 |
| hidden_dim | 20.000000 |
| input_chunk_length | 33.000000 |
| lr | 0.003000 |
| n_epochs | 20.000000 |
| n_rnn_layers | 2.000000 |
| training_length | 35.000000 |
| rmse | 27.710269 |
In [68]:
# Isolate and convert all parameters from the best parameter set
n_rnn_layers = int(params_round2.loc["n_rnn_layers"].iloc[0])
hidden_dim = int(params_round2.loc["hidden_dim"].iloc[0])
dropout = params_round2.loc["dropout"].iloc[0]
input_chunk_length = int(params_round2.loc["input_chunk_length"].iloc[0])
lr = params_round2.loc["lr"].iloc[0]
n_epochs = int(params_round2.loc["n_epochs"].iloc[0])
training_length = int(params_round2.loc["training_length"].iloc[0])
In [72]:
# Fix the start time to match the end of existing covariates
future_time_index = pd.date_range(
start = covariates_transformed.end_time() + pd.Timedelta(days=1),
periods = 48,
freq = "D"
)
# Create future covariates (month and weekday)
month_series = datetime_attribute_timeseries(future_time_index, attribute="month", one_hot=True)
weekday_series = datetime_attribute_timeseries(future_time_index, attribute="weekday", one_hot=True)
# Stack them
future_covariates = month_series.stack(weekday_series)
# Now safely append
covariates_transformed = covariates_transformed.append(future_covariates)
In [74]:
# LSTM model
model = RNNModel(model = "LSTM", # Specify the LSTM model
hidden_dim = hidden_dim, # Number of hidden units
n_rnn_layers = n_rnn_layers, # Number of RNN layers
dropout = dropout, # Dropout rate
n_epochs = n_epochs, # Number of training epochs
optimizer_kwargs = {"lr": lr}, # Learning rate for the optimizer
random_state = 1502, # Random seed for reproducibility
training_length = training_length, # Length of training data
input_chunk_length = input_chunk_length, # Length of input chunks
#pl_trainer_kwargs = {"accelerator": "cpu"} # Use CPU for training
pl_trainer_kwargs = {"accelerator": "gpu"}
# "devices": [0]}
)
# Fit the model with the transformed time series data and covariates
model.fit(series_transformed, future_covariates = covariates_transformed)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] INFO:pytorch_lightning.callbacks.model_summary: | Name | Type | Params | Mode ------------------------------------------------------------- 0 | criterion | MSELoss | 0 | train 1 | train_criterion | MSELoss | 0 | train 2 | val_criterion | MSELoss | 0 | train 3 | train_metrics | MetricCollection | 0 | train 4 | val_metrics | MetricCollection | 0 | train 5 | rnn | LSTM | 332 K | train 6 | V | Linear | 85.5 K | train ------------------------------------------------------------- 417 K Trainable params 0 Non-trainable params 417 K Total params 1.671 Total estimated model params size (MB) 7 Modules in train mode 0 Modules in eval mode
Training: | | 0/? [00:00<?, ?it/s]
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.
Out[74]:
RNNModel(model=LSTM, hidden_dim=20, n_rnn_layers=2, dropout=0.1, training_length=35, n_epochs=20, optimizer_kwargs={'lr': np.float64(0.003)}, random_state=1502, input_chunk_length=33, pl_trainer_kwargs={'accelerator': 'gpu'})
In [79]:
predictions = model.predict(n=48, future_covariates=covariates_transformed)
predictions_ts = scaler1.inverse_transform(predictions)
# Convert TimeSeries to pandas DataFrame
predictions_df = pd.DataFrame(
predictions_ts.values(),
index=predictions_ts.time_index,
columns=predictions_ts.components
)
# Display
predictions_df.head()
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting: | | 0/? [00:00<?, ?it/s]
Out[79]:
| component | 10002 | 10080 | 10120 | 10123C | 10123G | 10124A | 10124G | 10125 | 10133 | 10134 | ... | M | PADS | POST | S | gift_0001_10 | gift_0001_20 | gift_0001_30 | gift_0001_40 | gift_0001_50 | m |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||||||||
| 2011-12-10 | -0.512356 | 1.431048 | 0.675462 | 0.000227 | 0.006269 | 0.028843 | 0.066157 | 3.458318 | 7.645293 | 0.001089 | ... | 42.725739 | 0.041944 | 25.355469 | 0.001595 | 0.172427 | 0.076965 | 0.117738 | -0.001052 | 0.001778 | 0.000625 |
| 2011-12-11 | -0.504629 | 1.354150 | 0.604392 | -0.002392 | 0.003145 | 0.025220 | 0.057554 | 3.444736 | 7.694609 | -0.002214 | ... | 39.873820 | 0.032464 | 23.969211 | 0.005702 | 0.169401 | 0.069823 | 0.198776 | -0.000210 | 0.006801 | -0.001064 |
| 2011-12-12 | -0.121571 | 1.359752 | 0.576028 | -0.002091 | 0.001362 | 0.026992 | 0.059539 | 3.461042 | 7.721149 | -0.001916 | ... | 35.045198 | 0.029858 | 20.497963 | 0.006981 | 0.183092 | 0.072105 | 0.226905 | 0.000294 | 0.007774 | -0.001512 |
| 2011-12-13 | -0.112885 | 1.299037 | 0.555186 | -0.001126 | 0.000237 | 0.023596 | 0.057171 | 3.601968 | 7.828580 | -0.002063 | ... | 33.532194 | 0.027342 | 19.487278 | 0.008263 | 0.189960 | 0.067018 | 0.247886 | 0.000902 | 0.009331 | -0.001202 |
| 2011-12-14 | 0.236566 | 1.412009 | 0.540817 | 0.000820 | -0.000126 | 0.027119 | 0.061303 | 3.490754 | 7.761026 | -0.001335 | ... | 30.987324 | 0.024944 | 17.876522 | 0.007895 | 0.185502 | 0.066816 | 0.251030 | 0.000788 | 0.009268 | -0.001684 |
5 rows × 4070 columns